
#####################################################
#
#               Code to replicate 
#  Taxing the 1 percent: Public opinion vs. public policy 
#   
#                APPENDIX 
#
#    Ruben Berge Mathisen (University of Bergen)
#
#       Tables and Figures for Manuscript
#                June 2023
#
#   Original analyses conducted in R version 3.5.1 
#
#####################################################

# Theme 
theme_m <- function(...) {
  theme(
    text = element_text(size = 12,
                        colour = "black"),
    axis.text = element_text(size = 10,
                             colour = "black"),
    axis.title = element_text(size = 11,
                              colour = "black"),
    axis.line = element_line(size=0.3),
    axis.ticks = element_line(colour = "black"),
    axis.ticks.length = unit(1, "mm"),
    plot.margin = margin(1, 5, 1, 1, "mm"),
    panel.spacing.x = unit(7.5, "mm"),
    panel.spacing.y = unit(2.5, "mm"),
    panel.background = element_blank(),
    panel.border = element_rect(colour = "black", size=0.2,fill=NA),
    panel.grid.major = element_line(color="grey93"),
    strip.background = element_blank(),
    strip.text = element_text(size = 10,
                              face = "bold"),
    ## strip.text.y = element_text(angle = 0, face = "bold.italic"),
    strip.text.y = element_blank(),
    legend.background = element_blank(),
    legend.key = element_blank(),
    legend.position = "bottom",
    legend.title = element_text(size = 10, face = "bold"),
    legend.key.height = unit(4, "mm"),
    legend.title.align = .125,
    ...
  )
}


# Packages 
library(dplyr)
library(ggplot2)
library(haven)
library(foreign)
library(car)
library(readxl)
library(tidyr)
library(gridExtra)
library(lme4)
library(ggeffects)
library(kableExtra)
library(plyr)
library(ggjoy)
library(stargazer)
library(scales)
library(quantreg)
library(ggsci)
library(stringr)
library(readr)

# Colors and shapes 
pal_r <- c(pal_nejm("default")(10)[2],
           pal_nejm("default")(10)[1],
           pal_nejm("default")(10)[4],
           pal_nejm("default")(10)[3],
           "#FED439FF",
           "purple1",
           pal_uchicago("default")(1)[1],
           "skyblue2",
           "palegreen1",
           "lightcoral",
           "midnightblue")
shapes <- c(16,15,17,18,8,7,5,13,2,16,15,17,18,8,7,5,13,2,16,15,17,18,8,7,5,13,2)


# Figure A1: The evolution of the top marginal income tax rate in OECD countries.
piketty_all <- read_excel("Piketty top marginal tax data, all countries, for appendix.xlsx")
piketty_all$highlight <- factor(car::recode(piketty_all$country,
                                            "'United States'='United States'; 'United Kingdom'='United Kingdom'; 'Norway'='Norway';
                                            'France'='France';else='Other'"))
piketty_all$highlight <- factor(piketty_all$highlight, levels(factor(piketty_all$highlight))[c(4,5,3,1,2)])
ggplot(data=piketty_all%>%filter(highlight!='Other'), aes(x=year)) +
  geom_line(data=piketty_all%>%filter(highlight=='Other'),
            aes(y=tax*100,group=country),color="grey90") +
  geom_line(aes(y=tax*100,group=country,color=highlight)) +
  geom_point(aes(y=tax*100,shape=highlight,group=country,color=highlight),
             fill="white") +
  scale_y_continuous(labels=function(x) paste0(x,"%"),breaks=seq(0,100,10)) +
  scale_x_continuous(breaks=seq(1960,2010,5)) +
  scale_shape_manual(values = c(shapes[1:4])) +
  scale_color_manual(values=pal_r) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        plot.caption=element_text(margin=margin(t=15),
                                  size=10,
                                  hjust=0,
                                  color="grey30"),
        legend.position = c(0.8,0.75),
        legend.box.background = element_rect(colour = "black"),
        text=element_text(color="black"),
        axis.text=element_text(color="black")) +
  labs(y="Top marginal income tax rate",x=NULL,shape=NULL,color=NULL)


################# STUDY 2 ################# 

# Aggregated registry data on actual tax rates
reg_data  <- read_excel("Statistics Norway tax rates, for Study 2.xlsx")
reg_data <- reg_data %>% mutate(predicted=rate_nowealth) %>% dplyr::select(amount,predicted)
reg_data$predicted <- reg_data$predicted*100
reg_data$var <- "Average effective tax rates (2018)"
reg_data$conf.high <- NA
reg_data$conf.low <- NA
reg_data$std.error <- NA

# Norwegian Citizen Panel, data prep
d <- read_excel("NCP19 data, fro Study 2.xlsx")

# Assigned order of income amounts
d$order_group <- d$r19pad6_ran
# Preferred rate
d <- d %>% mutate(tax_a=coalesce(r19pad6a_1,
                                 r19pad6b_10),
                  tax_b=coalesce(r19pad6a_2,
                                 r19pad6b_9),
                  tax_c=coalesce(r19pad6a_3,
                                 r19pad6b_8),
                  tax_d=coalesce(r19pad6a_4,
                                 r19pad6b_7),
                  tax_e=coalesce(r19pad6a_5,
                                 r19pad6b_6),
                  tax_f=coalesce(r19pad6a_6,
                                 r19pad6b_5),
                  tax_g=coalesce(r19pad6a_7,
                                 r19pad6b_4),
                  tax_h=coalesce(r19pad6a_8,
                                 r19pad6b_3),
                  tax_i=coalesce(r19pad6a_9,
                                 r19pad6b_2),
                  tax_j=coalesce(r19pad6a_10,
                                 r19pad6b_1))
# Tax knowledge
tax_tab <- reg_data %>% dplyr::select(predicted)
tax_tab$knowledge_amount <- row.names(tax_tab)
d$knowledge_amount <- d$r19pad7_ran
d <- merge(d,tax_tab,by="knowledge_amount",all.x = T)
d$guess_group <- factor(d$r19pad7_ran,labels=c("$11,000","$28,000","$55,000","$83,000","$110,000","$220,000","$550,000",
                                               "$1,100,000","$5,500,000","$11,000,000"))
d$guess <- d$r19pad7_1
d$knowledge_nonabs <- d$r19pad7_1 - d$predicted
d$knowledge <- abs(d$knowledge_nonabs)
d$knowledge_dec <- car::recode(ntile(-d$knowledge,5),"1:4=0;5=1;else=NA")
d$progressivity <- d$tax_j-d$tax_a
# Background
d$resp_inc_old<-car::recode(d$r19pad8,"97:98=NA")
d$resp_inc<-as.factor(ntile(as.numeric(d$resp_inc_old),10))
d$edu_big <- as.factor(as.numeric(car::recode(d$r19P4_2,"12:97=NA")))
d$resp_inc_10 <- car::recode(as.numeric(d$resp_inc),"10=1; 1:9=0; else=NA")
d$edu_d<-as.factor(ntile(as.numeric(d$edu_big),10))
d$edu_10 <- car::recode(as.numeric(d$edu_d),"10=1; 1:9=0; else=NA")
d$occupation <- as.factor(car::recode(d$c18bk21,"97:98=NA;7=NA"))
d$age <- factor(d$r19P5_1)
d$gender <- factor(d$r19P1)
d$region <- factor(d$r19P2)
# Percentile income variable
d1 <- data.frame(table(d$resp_inc_old))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1 <- d1 %>% dplyr::rename(resp_inc_old=Var1,resp_inc_percentile=score) %>% dplyr::select(resp_inc_old,resp_inc_percentile)
d <- merge(d,d1,by="resp_inc_old",all.x=T)
# Party ad ideology
d$party<- car::recode(d$r19pk204,"1='Christ. Dem. Party'; 2='Conservative Party'; 3='Progress Party'; 4='Liberal Party'; 5='Soc. Left Party'; 6='Centre Party'; 7='Green Party'; 8='Labor Party'; 9='Red Party';else=NA")
d$party <- factor(d$party, levels(factor(d$party))[c(8,4,9,5,1,2,3,7,6)])
d$party_cat <- as.factor(car::recode(as.numeric(d$r19pk204),"5=1; 8:9=1; 1=2; 4=2; 6:7=2;  2:3=3; else=NA "))
d$party_right <- car::recode(as.numeric(d$r19pk204),"2:3=1; 4:9=0; 1=0; else=NA ")
ncp <- d
# Stacked dataset
dm <- d %>% gather(amount,tax,tax_a:tax_j)
tax_tab <- reg_data %>% dplyr::select(predicted) %>% mutate(mean2=predicted,predicted=NULL)
tax_tab$amount <- c("tax_a","tax_b","tax_c","tax_d","tax_e",
                    "tax_f","tax_g","tax_h","tax_i","tax_j")
tax_tab$amount_lab <- factor(c("$11,000",
                               "$28,000",
                               "$55,000",
                               "$83,000",
                               "$110,000",
                               "$220,000",
                               "$550,000",
                               "$1,100,000",
                               "$5,500,000",
                               "$11,000,000"),
                             levels=c("$11,000",
                                      "$28,000",
                                      "$55,000",
                                      "$83,000",
                                      "$110,000",
                                      "$220,000",
                                      "$550,000",
                                      "$1,100,000",
                                      "$5,500,000",
                                      "$11,000,000"))
tax_tab$amount_num <- c(100000,250000,500000,750000,1000000,
                        2000000,5000000,10000000,50000000,100000000)
tax_tab1 <- tax_tab %>% 
  mutate(group=amount_lab,
         predicted=mean2,
         x="Average effective tax rates (2018)") %>%
  dplyr::select(group,predicted,x)
dm <- merge(dm,tax_tab,by="amount",all.x = T)
dm$deviation_nonabs <- dm$tax - dm$mean2
dm$deviation <- abs(dm$deviation_nonabs)
dm$deviation_dic <- ifelse(dm$deviation_nonabs>0,1,ifelse(dm$deviation_nonabs<0,0,NA))
dm$deviation_dic_5 <- ifelse(dm$deviation_nonabs>=5,1,ifelse(dm$deviation_nonabs<5,0,NA))
dm$deviation_dic_10 <- ifelse(dm$deviation_nonabs>=10,1,ifelse(dm$deviation_nonabs<10,0,NA))
dm$deviation_dic_15 <- ifelse(dm$deviation_nonabs>=15,1,ifelse(dm$deviation_nonabs<15,0,NA))

# Figure A2: Distributions of preferred tax rates for different annual incomes
ggplot(dm,aes(y=reorder(amount_lab,-amount_num),x=tax)) +
  geom_joy(scale=3,alpha=0.9) +
  theme_m() +
  theme(panel.grid.minor = element_blank()) +
  scale_x_continuous(limits=c(0,100),
                     breaks=seq(0,100,10),
                     labels=function(x) paste0(x)) +
  labs(x="Preferred tax rate (%)",y="Annual taxable income")


# Figure A3: Standard deviation of tax rate preferences for different annual incomes
plot_data <- dm %>%
  group_by(amount_lab) %>%
  dplyr::summarise(dev=sd(tax,na.rm=T))
ggplot(plot_data,aes(amount_lab,dev)) +
  theme_m() +
  geom_line(size=1,group=1) +
  geom_point(size=3,group=1) +
  scale_shape_manual(values=c(17,15,16)) +
  scale_y_continuous(breaks=seq(0,60,2),limits=c(0,20)) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "none",
        #legend.box.background = element_rect(colour = "black",fill="white"),
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12)) +
  labs(x=NULL,y="Tax rate (%)",color=NULL,
       shape=NULL) +
  annotate("label",x=3.4,y=12,label="Standard deviation of\npreferred tax rate",label.size=NA)


# Figure A4: Limited effect of the order of incomes on average preferred tax rates
funn<-function(x){
  d1 <- dm %>% filter(amount_lab==x)
  m1 <- lm(tax~order_group+
             factor(resp_inc_old)+
             edu_big+
             occupation+
             age+
             gender,weight=r19Weight2,data=d1)
  coef <- summary(m1)$coeff[2,1]
  se <- summary(m1)$coeff[2,2]
  dat <- data.frame(coef,se)
  dat$group <- x
  dat
}
dat3<-do.call(rbind,(lapply(unique(dm$amount_lab),funn)))
dat3$type <- "Difference between (tax rate in high-low-order group) and (tax\nrate in low-high-order group), with 95% confidence intervals"
ggplot(dat3,aes(group,coef,group=type,color=type,shape=type)) +
  theme_m() +
  scale_color_manual(values=pal_r[3]) +
  scale_shape_manual(values=15) +
  geom_hline(yintercept = 0) +
  geom_line(size=1) +
  geom_point(size=3) +
  theme(legend.box.background = element_rect(color="black",fill="white"),
        legend.position = c(0.5,0.8),
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12)) +
  geom_line(aes(y=coef+se*1.96),size=0.3) +
  geom_line(aes(y=coef-se*1.96),size=0.3) +
  scale_y_continuous(limits=c(-7,8),
                     breaks=seq(-20,20,1),
                     labels=function(x)paste0(x,"%")) +
  labs(color=NULL,
       shape=NULL,
       x="Annual taxable income",
       y=NULL)


# Figure A5: Actual tax rates for high incomes are closer to the preferences of high income respondents than the rest.
funn<-function(x){
  d1 <- dm %>% filter(amount_lab==x)
  m1 <- lm(deviation~
             resp_inc_10+
             knowledge+
             edu_big+
             occupation+
             age+
             gender+
             region,weight=r19Weight2,data=d1)
  coef <- summary(m1)$coeff[2,1]
  se <- summary(m1)$coeff[2,2]
  dat <- data.frame(coef,se)
  dat$group <- x
  dat
}
dat3<-do.call(rbind,(lapply(unique(dm$amount_lab),funn)))
dat3$type <- "Difference between (tax rate deviation for top 10% income\nrespondents) and (tax rate deviation for bottom 90%),\nafter controls (with 95% confidence intervals)"
ggplot(dat3,aes(group,coef,group=type,color=type,shape=type)) +
  theme_m() +
  scale_color_manual(values=pal_r[2]) +
  scale_shape_manual(values=15) +
  geom_hline(yintercept = 0) +
  geom_line(size=1) +
  geom_point(size=3) +
  theme(legend.box.background = element_rect(color="black",fill="white"),
        legend.position = c(0.5,0.8),
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12)) +
  geom_line(aes(y=coef+se*1.96),size=0.3) +
  geom_line(aes(y=coef-se*1.96),size=0.3) +
  scale_y_continuous(limits=c(-10,7),
                     breaks=seq(-20,20,1),
                     labels=function(x)paste0(x,"%")) +
  labs(color=NULL,
       shape=NULL,
       x="Annual taxable income",
       y=NULL)


# Figure A6: Actual tax rates for high incomes are farther away from the preferences of the highly educated than the rest
funn<-function(x){
  d1 <- dm %>% filter(amount_lab==x)
  m1 <- lm(deviation~
             edu_10+
             knowledge+
             factor(resp_inc_old)+
             occupation+
             age+
             gender+
             region, weight=r19Weight2, data=d1)
  coef <- summary(m1)$coeff[2,1]
  se <- summary(m1)$coeff[2,2]
  dat <- data.frame(coef,se)
  dat$group <- x
  dat
}
dat3<-do.call(rbind,(lapply(unique(dm$amount_lab),funn)))
dat3$type <- "Difference between (tax rate deviation for top 10% educated\nrespondents) and (tax rate deviation for bottom 90%),\nafter controls (with 95% confidence intervals)"
ggplot(dat3,aes(group,coef,group=type,color=type,shape=type)) +
  theme_m() +
  scale_color_manual(values=pal_r[6]) +
  scale_shape_manual(values=15) +
  geom_hline(yintercept = 0) +
  geom_line(size=1) +
  geom_point(size=3) +
  theme(legend.box.background = element_rect(color="black",fill="white"),
        legend.position = c(0.5,0.2),
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12)) +
  geom_line(aes(y=coef+se*1.96),size=0.3) +
  geom_line(aes(y=coef-se*1.96),size=0.3) +
  scale_y_continuous(limits=c(-10,15),
                     breaks=seq(-20,20,2),
                     labels=function(x)paste0(x,"%")) +
  labs(color=NULL,
       shape=NULL,
       x="Annual taxable income",
       y=NULL)

# Figure A7: Actual tax rates for high incomes are closer to the preferences of right-party voters than the rest.
funn<-function(x){
  d1 <- dm %>% filter(amount_lab==x)
  m1 <- lm(deviation~
             party_right+
             knowledge+
             factor(resp_inc_old)+
             edu_big+
             occupation+
             age+
             gender+
             region,weight=r19Weight2,data=d1)
  coef <- summary(m1)$coeff[2,1]
  se <- summary(m1)$coeff[2,2]
  dat <- data.frame(coef,se)
  dat$group <- x
  dat
}
dat3<-do.call(rbind,(lapply(unique(dm$amount_lab),funn)))
dat3$type <- "Difference between (tax rate deviation for right-wing\nrespondents) and (tax rate deviation for all others),\nafter controls (with 95% confidence intervals)"
ggplot(dat3,aes(group,coef,group=type,color=type,shape=type)) +
  theme_m() +
  scale_color_manual(values=pal_r[1]) +
  scale_shape_manual(values=15) +
  geom_hline(yintercept = 0) +
  geom_line(size=1) +
  geom_point(size=3) +
  theme(legend.box.background = element_rect(color="black",fill="white"),
        legend.position = c(0.5,0.8),
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12)) +
  geom_line(aes(y=coef+se*1.96),size=0.3) +
  geom_line(aes(y=coef-se*1.96),size=0.3) +
  scale_y_continuous(limits=c(-10,7),
                     breaks=seq(-20,20,2),
                     labels=function(x)paste0(x,"%")) +
  labs(color=NULL,
       shape=NULL,
       x="Annual taxable income",
       y=NULL)


# Figure A8: Actual tax rates compared to preferred tax rates, by respondent party ID
m<-lm(tax~amount_lab*party,weights=r19Weight2,data=dm)
pred_party <- data.frame(ggemmeans(m, terms = c("party","amount_lab")))
pred_party <- rbind.fill(pred_party,tax_tab1)
pred_party$x <- factor(pred_party$x, levels(factor(pred_party$x))[c(9,5,10,6,2,3,7,4,8,1)])

p<-ggplot(pred_party,aes(x=group,y=predicted,color=x,shape=x,group=x)) +
  theme_bw() +
  geom_line(size=0.8) +
  scale_shape_manual(values = c(shapes[1:9],16)) +
  geom_point(size=2,fill="white") +
  scale_y_continuous(breaks=seq(0,70,10),labels=function(x) paste0(x,"%")) +
  theme(legend.position = "right",
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(color="black"),
        axis.text = element_text(color="black")) +
  labs(x=NULL,y="Tax rate",color="Party ID",
       shape="Party ID") +
  scale_color_manual(values=c(pal_r[c(7,3,6,2,4,5,9,1,11)],"black"))
col <- "black"
l1 <- -17
l2 <- -22
p + coord_cartesian(ylim = c(0,65), clip = "off") + # Make sure inside of plot is not changed
  theme(plot.margin=unit(c(0.5,0.5,1.5,0.5),"cm")) + # Make room
  annotate("text", x = 2.85, y = l2-2.5, label = "Bottom 99 percent",color=col) +
  geom_segment(x=0.4,y=l2,xend=5.5,yend=l2,color=col,size=0.3) +
  annotate("text", x = 8.2, y = l2-2.5, label = "Top 1 percent",color=col) +
  geom_segment(x=6,y=l2,xend=10.5,yend=l2,color=col,size=0.3) +
  annotate("text", x = 5.5, y = l2-8, label = "Annual taxable income")


################# STUDY 3 ################# 

# Load data
yg <- read_sav("YouGov survey, for Study 3.sav")
# Data prep
## Labor income
yg$tax_a_l <- coalesce(yg$UB1_1_1, yg$UB4_1_1)
yg$tax_b_l <- coalesce(yg$UB1_2_1, yg$UB4_2_1)
yg$tax_c_l <- coalesce(yg$UB1_3_1, yg$UB4_3_1)
yg$tax_d_l <- coalesce(yg$UB1_4_1, yg$UB4_4_1)
yg$tax_e_l <- coalesce(yg$UB1_5_1, yg$UB4_5_1)
yg$tax_f_l <- coalesce(yg$UB1_6_1, yg$UB4_6_1)
yg$tax_g_l <- coalesce(yg$UB1_7_1, yg$UB4_7_1)
yg$tax_h_l <- coalesce(yg$UB1_8_1, yg$UB4_8_1)
yg$tax_i_l <- coalesce(yg$UB1_9_1, yg$UB4_9_1)
yg$tax_j_l <- coalesce(yg$UB1_10_1, yg$UB4_10_1)
## Capital income
yg$tax_a_c <- coalesce(yg$UB2_1_1, yg$UB3_1_1)
yg$tax_b_c <- coalesce(yg$UB2_2_1, yg$UB3_2_1)
yg$tax_c_c <- coalesce(yg$UB2_3_1, yg$UB3_3_1)
yg$tax_d_c <- coalesce(yg$UB2_4_1, yg$UB3_4_1)
yg$tax_e_c <- coalesce(yg$UB2_5_1, yg$UB3_5_1)
yg$tax_f_c <- coalesce(yg$UB2_6_1, yg$UB3_6_1)
yg$tax_g_c <- coalesce(yg$UB2_7_1, yg$UB3_7_1)
yg$tax_h_c <- coalesce(yg$UB2_8_1, yg$UB3_8_1)
yg$tax_i_c <- coalesce(yg$UB2_9_1, yg$UB3_9_1)
yg$tax_j_c <- coalesce(yg$UB2_10_1, yg$UB3_10_1)
## Combine and recode
yg_long <- yg %>% gather(var, val,tax_a_l:tax_j_c)
yg_long$source <- str_sub(yg_long$var,-1,-1)
yg_long$amount <- str_sub(yg_long$var,1,5)
yg_long$amount <- factor(yg_long$amount, 
                         labels=c("$11,000","$28,000","$55,000","$83,000","$110,000","$220,000","$550,000","$1,100,000","$5,500,000","$11,000,000"))
yg_long$source <- factor(yg_long$source, labels=c("Capital income",
                                                  "Labor income"))
yg_long$income <- yg_long$household_income_rc
yg_long$party <- factor(car::recode(yg_long$FT_next, "10:hi=NA"))


# Figure A9: Preferred and effective tax rates for labor and capital income, only right-wing voters (H, FRP).
d1 <- yg_long %>%
  filter(FT_next %in% 2:3) %>% 
  group_by(source,amount) %>%
  dplyr::summarise(mean=weighted.mean(val,w=weight,na.rm=T),
                   se=sqrt(var(val)/length(val)))
d1$conf.high <- d1$mean + d1$se*2.58
d1$conf.low <- d1$mean - d1$se*2.58

tax_off <- read_excel("Tax rates, Skatteetaten, for Study 3.xlsx") %>% 
  dplyr::select(amount_num,labor,capital_low,capital_high)
#tax_off$source <- ifelse(tax_off$line=="labor","Labor income","Capital income")
tax_off$amount <- factor(tax_off$amount_num,labels=c("$11,000","$28,000","$55,000","$83,000","$110,000","$220,000","$550,000","$1,100,000","$5,500,000","$11,000,000"))

# Labor
d1_l <- merge(d1%>%filter(source=="Labor income"),
              tax_off%>%dplyr::select(amount,labor),
              by=c("amount")) %>%
  gather(var,val,c(mean,labor))
d1_l$var <- car::recode(d1_l$var,"'mean'='Average preferred tax rate on labor income';
                        'labor'='Actual tax rate on labor income'")
d1_l$var<-factor(d1_l$var,levels(factor(d1_l$var))[c(2,1)])
d1_l <- arrange(d1_l,d1_l$var) # To get the ideal order of layers
p1<-ggplot(d1_l,aes(amount,val)) +
  theme_m() +
  geom_line(aes(linetype=var,color=var,group=var),size=1) +
  geom_line(aes(y=conf.low,group=1),size=0.3,color=pal_r[11]) +
  geom_line(aes(y=conf.high,group=1),size=0.3,color=pal_r[11]) +  
  geom_point(aes(shape=var,color=var),size=2.3) +
  scale_shape_manual(values=c(16,15)) +
  scale_linetype_manual(values=c("solid","solid")) +
  scale_color_manual(values=c(pal_r[c(11,8)])) +
  scale_y_continuous(breaks=seq(0,60,10),
                     limits=c(0,50),labels=function(x)paste0(x,"%")) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "bottom",
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12),
        plot.margin=unit(c(0.3,0.5,0.5,0.3),"cm")) +
  guides(color=guide_legend(ncol=1,reverse=T),
         linetype=guide_legend(ncol=1,reverse=T),
         shape=guide_legend(ncol=1,reverse=T)) +
  labs(x=NULL,
       y="Tax rate",
       color=NULL,linetype=NULL,shape=NULL,
       title="Labor income")

# Capital
d1_c <- merge(d1%>%filter(source=="Capital income"),
              tax_off%>%dplyr::select(amount,capital_high,capital_low),
              by=c("amount")) %>%
  gather(var,val,c(mean,capital_high,capital_low)) 
d1_c$var <- car::recode(d1_c$var,"'mean'='Average preferred tax rate on capital income';
                        'capital_high'='Actual tax rate on capital income (gains/dividends)';'capital_low'='Actual tax rate on capital income (interest/rents)'")
d1_c$var<-factor(d1_c$var,levels(factor(d1_c$var))[c(2,1,3)])
d1_c <- arrange(d1_c,d1_c$var) # To get the ideal order of layers
p2<-ggplot(d1_c,aes(amount,val)) +
  theme_m() + 
  geom_line(aes(linetype=var,color=var,group=var),size=1) +
  geom_line(aes(y=conf.low,group=1),size=0.3,color=pal_r[7]) +
  geom_line(aes(y=conf.high,group=1),size=0.3,color=pal_r[7]) +  
  geom_point(aes(shape=var,color=var),size=2.3) +
  scale_shape_manual(values=c(15,17,16)) +
  scale_linetype_manual(values=c("solid","solid","solid")) +
  scale_color_manual(values=c(pal_r[c(4,10,7)])) +
  scale_y_continuous(breaks=seq(0,60,10),
                     limits=c(0,50),labels=function(x)paste0(x,"%")) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "bottom",
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12),
        plot.margin=unit(c(0.3,1,0.3,0.3),"cm")) +
  guides(color=guide_legend(ncol=1,reverse=T),
         linetype=guide_legend(ncol=1,reverse=T),
         shape=guide_legend(ncol=1,reverse=T)) +
  labs(x=NULL,
       y=NULL,
       color=NULL,linetype=NULL,shape=NULL,
       title="Capital income")
grid.arrange(p1,p2,ncol=2)

# Figure A10: Preferred and effective tax rates for labor and capital income, only high income citizens (household income > USD 77,000.)
d1 <- yg_long %>%
  filter(household_income_rc == 3) %>% 
  group_by(source,amount) %>%
  dplyr::summarise(mean=weighted.mean(val,w=weight,na.rm=T),
                   se=sqrt(var(val)/length(val)))
d1$conf.high <- d1$mean + d1$se*2.58
d1$conf.low <- d1$mean - d1$se*2.58
tax_off <- read_excel("Downloads/Tax rates, Skatteetaten.xlsx") %>% 
  dplyr::select(amount_num,labor,capital_low,capital_high)
tax_off$amount <- factor(tax_off$amount_num,labels=c("$11,000","$28,000","$55,000","$83,000","$110,000","$220,000","$550,000","$1,100,000","$5,500,000","$11,000,000"))
# Labor
d1_l <- merge(d1%>%filter(source=="Labor income"),
              tax_off%>%dplyr::select(amount,labor),
              by=c("amount")) %>%
  gather(var,val,c(mean,labor))
d1_l$var <- car::recode(d1_l$var,"'mean'='Average preferred tax rate on labor income';
                        'labor'='Actual tax rate on labor income'")
d1_l$var<-factor(d1_l$var,levels(factor(d1_l$var))[c(2,1)])
d1_l <- arrange(d1_l,d1_l$var) # To get the ideal order of layers
p1<-ggplot(d1_l,aes(amount,val)) +
  theme_m() +
  geom_line(aes(linetype=var,color=var,group=var),size=1) +
  geom_line(aes(y=conf.low,group=1),size=0.3,color=pal_r[11]) +
  geom_line(aes(y=conf.high,group=1),size=0.3,color=pal_r[11]) +  
  geom_point(aes(shape=var,color=var),size=2.3) +
  scale_shape_manual(values=c(16,15)) +
  scale_linetype_manual(values=c("solid","solid")) +
  scale_color_manual(values=c(pal_r[c(11,8)])) +
  scale_y_continuous(breaks=seq(0,60,10),
                     limits=c(0,50),labels=function(x)paste0(x,"%")) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "bottom",
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12),
        plot.margin=unit(c(0.3,0.5,0.5,0.3),"cm")) +
  guides(color=guide_legend(ncol=1,reverse=T),
         linetype=guide_legend(ncol=1,reverse=T),
         shape=guide_legend(ncol=1,reverse=T)) +
  labs(x=NULL,
       y="Tax rate",
       color=NULL,linetype=NULL,shape=NULL,
       title="Labor income")
# Capital
d1_c <- merge(d1%>%filter(source=="Capital income"),
              tax_off%>%dplyr::select(amount,capital_high,capital_low),
              by=c("amount")) %>%
  gather(var,val,c(mean,capital_high,capital_low)) 
d1_c$var <- car::recode(d1_c$var,"'mean'='Average preferred tax rate on capital income';
                        'capital_high'='Actual tax rate on capital income (gains/dividends)';'capital_low'='Actual tax rate on capital income (interest/rents)'")
d1_c$var<-factor(d1_c$var,levels(factor(d1_c$var))[c(2,1,3)])
d1_c <- arrange(d1_c,d1_c$var) # To get the ideal order of layers
p2<-ggplot(d1_c,aes(amount,val)) +
  theme_m() + 
  geom_line(aes(linetype=var,color=var,group=var),size=1) +
  geom_line(aes(y=conf.low,group=1),size=0.3,color=pal_r[7]) +
  geom_line(aes(y=conf.high,group=1),size=0.3,color=pal_r[7]) +  
  geom_point(aes(shape=var,color=var),size=2.3) +
  scale_shape_manual(values=c(15,17,16)) +
  scale_linetype_manual(values=c("solid","solid","solid")) +
  scale_color_manual(values=c(pal_r[c(4,10,7)])) +
  scale_y_continuous(breaks=seq(0,60,10),
                     limits=c(0,50),labels=function(x)paste0(x,"%")) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "bottom",
        axis.text.x = element_text(angle=45, hjust=1),
        text = element_text(size=12),
        plot.margin=unit(c(0.3,1,0.3,0.3),"cm")) +
  guides(color=guide_legend(ncol=1,reverse=T),
         linetype=guide_legend(ncol=1,reverse=T),
         shape=guide_legend(ncol=1,reverse=T)) +
  labs(x=NULL,
       y=NULL,
       color=NULL,linetype=NULL,shape=NULL,
       title="Capital income")
grid.arrange(p1,p2,ncol=2)


# Figure A11: Preferred vs. actual vs. percieved actual tax rates. 
funn<-function(x){
  d1 <- dm %>% filter(amount_lab==x)
  m1 <- lm(tax~tax,data=d1,weights=r19Weight2)
  predicted <- summary(m1)$coeff[1,1]
  se <- summary(m1)$coeff[1,2]
  dat <- data.frame(predicted,se)
  dat$group <- x
  dat
}
p<-do.call(rbind,(lapply(unique(dm$amount_lab),funn)))
p$conf.high <- p$predicted + p$se*1.96
p$conf.low <- p$predicted - p$se*1.96
p$x <- "Average preferred"
p1 <- rbind.fill(p,tax_tab1)
m <- lm(guess~guess_group,weight=r19Weight2,data=d)
p2 <- sjlabelled::as_label(ggemmeans(m,terms=c("guess_group")))
p2 <- p2[,-6] %>% 
  dplyr::rename(group=x,
                se=std.error) %>%
  mutate(x="Guess")
p1 <- rbind(p1,p2)
p1$x <- factor(p1$x,levels(factor(p1$x))[c(2,3,1)])
p1 <- arrange(p1,p1$x) # To get the ideal order of layers
p<-ggplot(p1,aes(x=group,
                 y=predicted,
                 color=x,
                 shape=x,
                 group=x)) +
  theme_m() +
  geom_line(size=1) +
  geom_line(aes(y=conf.low),size=0.3) +
  geom_line(aes(y=conf.high),size=0.3) +
  geom_point(size=3,fill="white") +
  scale_shape_manual(values=c(17,16,15)) +
  scale_y_continuous(breaks=seq(0,60,10),labels=function(x)paste0(x,"%")) + # NOTE: Do not include limits, ruins extra x-asis brackets using coord cartesion below
  theme(panel.grid.major = element_line(color="grey95"),
        legend.position = "none",
        #legend.box.background = element_rect(colour = "black",fill="white"),
        axis.text.x = element_text(angle=45, hjust=1)) +
  labs(x=NULL,y="Tax rate",color=NULL,
       shape=NULL) +
  scale_color_manual(values=c(pal_r[c(1,4)],"black")) +
  annotate("label",x=6.3,y=51,label="Citizens' preferred tax rates",label.size=NA,size=3) +
  annotate("label",x=8,y=16,label="Actual effective\ntax rates (2018)",label.size=NA,size=3) +
  annotate("label",x=2.5,y=45,label="Citizens' guess for\nthe actual rate",label.size=NA,size=3) +
  geom_segment(aes(x = 6.3, y = 48, xend = 6.6, yend = 43),
               arrow = arrow(length=unit(0.2,"cm"),type = "closed"),size=0.2,color="black") +
  geom_segment(aes(x = 8, y = 20, xend = 9.3, yend = 24),
               arrow = arrow(length=unit(0.2,"cm"),type = "closed"),size=0.2,color="black") +
  geom_segment(aes(x = 2.5, y = 40, xend = 2.7, yend = 35),
               arrow = arrow(length=unit(0.2,"cm"),type = "closed"),size=0.2,color="black")
col <- "black"
l1 <- -18
l2 <- -22
p + coord_cartesian(ylim = c(0,55), clip = "off") + # Make sure inside of plot is not changed
  theme(plot.margin=unit(c(0.5,0.5,1.7,0.5),"cm")) + # Make room
  annotate("text", x = 2.85, y = l2-2.5, label = "Bottom 99 percent",color=col) +
  geom_segment(x=0.4,y=l2,xend=5.5,yend=l2,color=col,size=0.3) +
  annotate("text", x = 8.2, y = l2-2.5, label = "Top 1 percent",color=col) +
  geom_segment(x=6,y=l2,xend=10.5,yend=l2,color=col,size=0.3) +
  annotate("text", x = 5.5, y = l2-8, label = "Annual taxable income")


